# -*- coding: utf-8 -*-
# @Time    : 2021/6/10 17:04
# @Author  : 万方名
# @FileName: 多线程模板.py


import pickle
import threading
import pandas as pd
from time import time
from tqdm import tqdm
from itertools import chain, islice
from patpynamo.tables.text import batch_get_tacd
from patpynamo.tables import PatentSemanticFeature


def chunk_iter(iterable, size):
    iterator = iter(iterable)
    for first in iterator:
        yield list(chain([first], islice(iterator, size - 1)))


class MyThread(threading.Thread):
    def __init__(self, func, args=()):
        super(MyThread, self).__init__()
        self.func = func
        self.args = args

    def run(self):
        self.result = self.func(*self.args)

    def get_result(self):
        try:
            return self.result  # 如果子线程不使用join方法，此处可能会报没有self.result的错误
        except Exception:
            return None


def process_(file_name, index):
    df = pd.read_csv(file_name)
    all_pids = list(df['patent_id'].values)

    start_time = time()

    # fetch TACD
    list3_file = open(f"tacd_{index}.pickle", "wb")
    for chunk_pid in tqdm(chunk_iter(all_pids, size=20)):
        for k in batch_get_tacd(chunk_pid, lang='CN', add_trans=False):
            pickle.dump(k, list3_file)
    list3_file.close()

    print(f'{file_name}拉取完毕,共花：{time() - start_time}')


def main():
    start = time()
    t_list = []
    for index in range(20):
        file_name = f'./data/df_cut_{index}.csv'
        t = MyThread(process_, (file_name, index))
        t_list.append(t)
        t.start()

    for t in t_list:
        t.join()  # 一定要join，不然主线程比子线程跑的快，会拿不到结果

    print('所有TACD拉取完毕，共花费{}秒。'.format(time() - start))


if __name__ == '__main__':
    main()

